knitr document van Steensel lab

Introduction

Clone 5 with ~18 sgRNA target site integrations was cut and treated with 160 epigenetic drugs. The repair outcomes of that experiment will be visualized here. I previously processed the raw sequencing data, and calculated the repair outcomes. In this script, the main results plots will be generated.

Setup

Libraries

knitr::opts_chunk$set(echo = TRUE)
StartTime <-Sys.time()

# 8-digit Date tag:
Date <- substr(gsub("-","",Sys.time()),1,8) 
# libraries:
library(dplyr)
library(outliers)
library(ggplot2)
library(ggbeeswarm)
library(data.table)
library(ggrepel)
library(GGally)
library(DESeq2)
library(gghighlight)
library(platetools)
library(ggpubr)
library(tidyr)
library(Laurae)

Functions

Data import

Viability data

# First combine the data from the two replicates
# indel.data <- rbind(indel.data.r1, indel.data.r2)

# Add viability data to indel.data
viability.comp$id <- gsub("_[0-9]{3}$", "", viability.comp$id)
viability.rep1 <- viability.comp %>%
  dplyr::select(id, viability) %>%
  mutate(biorep = "rep1")
viability.rep2 <- viability.comp %>%
  dplyr::select(id, X) %>%
  setnames("X", "viability") %>% 
  mutate(biorep = "rep2")
viability.comp <- rbind(viability.rep1, viability.rep2)
viability.comp <- viability.comp %>%
  mutate(well = gsub(".*_([A-Z][0-9]{1,2}$)", "\\1", id),
         well = gsub("([A-Z])([0-9]{1}$)", "\\10\\2", well),
         tech = gsub(".*rep([0-9]{1}).*", "\\1", id),
         concentration = gsub("(.*)[a-z]{1}m_.*", "\\1", id),
         drug_plate = gsub(".*plate([0-9]{1}).*", "\\1", id))
viability.comp$replicate[viability.comp$biorep == "rep1"] <- "E177"
viability.comp$replicate[viability.comp$biorep == "rep2"] <- "E1504"
viability.comp <- viability.comp %>%
  dplyr::select(-biorep, -id)
id_df <- indel.data %>%
  dplyr::select(ID, well, tech, concentration, drug_plate, replicate) %>%
  unique()
viability.comp <- merge(viability.comp, id_df)
indel.data$concentration <- as.character(indel.data$concentration)

indel.data <- merge(viability.comp, indel.data) %>% 
  mutate(viability = ave(viability, plate, replicate, FUN = function(x) x/max(x)))

indel.data$concentration <- factor(indel.data$concentration, levels = c("100", "1", "10"))

indel.data2 <- indel.data
indel.data2$group <- "drug"
indel.data2$group[indel.data2$drug == "DMSO"] <- "DMSO"
indel.data2$group[indel.data2$drug == "PAO"] <- "PAO"
indel.data2$ID <- gsub("^E[0-9]{3,4}_", "", indel.data2$ID)

indel.data2 <- indel.data2 %>%
  dplyr::select(viability, replicate, barcode, ID, group, concentration, drug, target) %>%
  spread(key = replicate, value = viability)

sp <- ggscatter(indel.data2, x = "E1504", y = "E177",
                color = "group", size = 1.5, add = "reg.line",
   add.params = list(color = "blue", fill = "lightgray"), title = "viability episcreen rep 1 vs. rep 2",
   conf.int = TRUE, ylab = "rep 2", xlab = "rep 1")

sp + stat_cor(method = "pearson", label.x = 0, label.y = 0.15) + 
  geom_segment(x = 0.5, xend = 0.5, y = 0.5, yend = 1, linetype = "dashed") +
  geom_segment(x = 0.5, xend = 1, y = 0.5, yend = 0.5, linetype = "dashed") +
  facet_wrap(~concentration) +theme_bw() +
  scale_color_manual(values = c("#2a9d8f", "black", "#e76f51"))
## `geom_smooth()` using formula 'y ~ x'

# Explore differences between rep 1 & rep 2
indel.data2 <- indel.data2 %>%
  mutate(dif = E1504 / E177) %>%
  dplyr::select(dif, drug, target, concentration, E1504, E177) %>%
  unique()

indel.data2$changed <- "no"
indel.data2$changed[indel.data2$dif >= 2] <- "yes"

ggplot(indel.data2, aes(x = target, y = log2(dif))) +
  geom_quasirandom() +
  theme_bw() +
  ylab("log2(rep 1 viability / rep 2 viabilty)") +
  facet_wrap(~concentration) +
  coord_flip()

Most of the wells that had a drastic drop in viability in rep 2 are not represented in the indel data - so they are excluded from these plots

Data pre-processing

## save favorite colors
colors_diverse <- c("#264653", "#2a9d8f", "#e9c46a", "#f4a261", "#e76f51") 

## Make some density plots to get a feeling for the data
indel.data <- merge(indel.data, integrations)
indel.data$integration <- factor(indel.data$integration, levels = c("Integration 1", "Integration 2", "Integration 3", "Integration 4", "Integration 5",
                                                                    "Integration 6", "Integration 7", "Integration 8", "Integration 9", "Integration 10",
                                                                    "Integration 11", "Integration 12", "Integration 13", "Integration 14", 
                                                                    "Integration 15", "Integration 16", "Integration 17", "Integration 18"))

# Save file before to filtering
indel.data <- indel.data %>%
  setnames(c("ID"), c("condition"))
all.indel.data <- indel.data

# Remove barcodes with abnormal counts from analysis
# Density plot of reads.count using only PAO data -> these are the input reads
all.indel.data <- all.indel.data[!is.na(all.indel.data$barcode),]
ggplot(data = all.indel.data[all.indel.data$drug == c("DMSO"),] %>%
         dplyr::select(barcode, condition, read.count1.7, drug, replicate) %>%
         unique(), aes(x = read.count1.7, color = replicate, group = replicate)) + 
  geom_density() + 
  facet_wrap(~barcode, nrow = 6)+ 
  labs(title = "read count before cutting (PAO) and after cutting (DMSO)") + 
  theme_bw()+
  scale_color_manual(values = c("#2a9d8f", "black"))

# Remove two barcodes with abnormal indel read counts
indel.data <- indel.data[indel.data$barcode != "ACCCCTAAAGGCGCTG" &
                           indel.data$barcode != "CTCTTAATCGCTGCC",]

# Also remove "TGTCCCTTAGTACTTT"? - this one has a bit wider spread of DMSO indel scores (at least in rep 1)

# Remove 0, NAs, Infs
indel.data <- indel.data[is.finite(indel.data$MMEJscore),]
indel.data <- indel.data[indel.data$MMEJscore != 0,]

# Exclude noisy data - estimate fraction of what that is
ggplot(indel.data %>%
         dplyr::select(read.count1.7, replicate, barcode, condition, plate) %>%
         unique(), aes(color = replicate, x = read.count1.7)) +
  geom_density() +
  geom_vline(xintercept = 30) +
  facet_wrap(~plate) +
  xlim(0,500) +
  theme_bw()

# In some plates of rep 2 the reads peak at around 100 - is this a problem?

ggplot(indel.data %>%
         dplyr::select(read.count1.7, replicate, barcode, condition, plate) %>%
         unique(), aes(color = replicate, x = read.count1.7)) +
  geom_density() +
  geom_vline(xintercept = 30) +
  facet_wrap(~plate) +
  xlim(0,100) +
  theme_bw()

indel.data <- indel.data[indel.data$read.count1.7 > 30,]

# We want to exclude reads from cells that have reduced viability (set cut-off at 50% reduced viability)
indel.data <- indel.data[indel.data$viability > 0.5,]


# Optional: Exclude data completely (remove the remaining datapoint) if 2 out of 3 replicates were already excluded until now
## indel.data <- indel.data[indel.data$uniqueID %in% names(which(table(indel.data$uniqueID) > 1)), ]

DMSO indel scores

# Plot MMEJ scores at each barcode for the two bioreps
ggplot(indel.data %>% 
         dplyr::select(MMEJscore, condition, integration, drug, replicate) %>%
         filter(drug == "DMSO") %>%
         unique(), aes(x = integration, MMEJscore)) +
  geom_quasirandom() +
  theme_bw() +
  coord_flip() +
  facet_wrap(~replicate)

# Correlation DMSO MMEJ scores
indel.data$condition_2 <-  gsub("^E[0-9]{3,4}_", "", indel.data$condition)
indel.data.DMSO.rep <- indel.data %>% 
  dplyr::select(MMEJscore, condition_2, integration, drug, replicate, plate) %>%
  filter(drug == "DMSO") %>%
  dcast(condition_2 + integration + drug + plate ~ replicate, value.var = "MMEJscore")

sp <- ggscatter(indel.data.DMSO.rep, x = "E177", y = "E1504",
                size = 1.5, add = "reg.line",
   add.params = list(color = "blue", fill = "lightgray"), title = "MMEJscore rep 1 vs. rep 2",
   conf.int = TRUE, ylab = "rep 2", xlab = "rep 1")

sp + stat_cor(method = "pearson", label.x = 0, label.y = 0.6) + 
  facet_wrap(~plate) +theme_bw()
## `geom_smooth()` using formula 'y ~ x'

Data normalization - normalize indel counts

## Normalize the data

## Check if we have normally distributed DMSO data
qqnorm(indel.data$MMEJscore[indel.data$drug == "DMSO"])

qqnorm(indel.data$freqCut[indel.data$drug == "DMSO"])

# Plate normalization for cutting/repair efficiency
for (i in unique(indel.data$plate)) {
  for (j in unique(indel.data$barcode)) {
    for(k in unique(indel.data$replicate)) {
      dmso.eff.mean <- mean(indel.data$freqCut[indel.data$drug == "DMSO" & 
                                                 indel.data$plate ==  i & 
                                                 indel.data$barcode == j & 
                                                 indel.data$replicate == k])
  indel.data$freqCut_norm[indel.data$plate == i & indel.data$barcode == j & indel.data$replicate == k] <-
    indel.data$freqCut[indel.data$plate == i & indel.data$barcode == j & indel.data$replicate == k] / dmso.eff.mean
    }
  }
}


## MMEJscore z-score
for (i in unique(indel.data$drug)) {
  for (j in unique(indel.data$concentration)) {
    for (k in unique(indel.data$replicate)) {
    indel.data$MMEJ_zscore[indel.data$drug == i & indel.data$concentration == j & indel.data$replicate == k] <-
      (indel.data$MMEJscore[indel.data$drug == i & indel.data$concentration == j & indel.data$replicate == k] -
      mean(indel.data$MMEJscore[indel.data$drug == "DMSO" & indel.data$concentration == j & indel.data$replicate == k])) / 
      sd(indel.data$MMEJscore[indel.data$drug == "DMSO" & indel.data$concentration == j & indel.data$replicate == k])
    }
  }
} 

indel.data <- indel.data %>%
  mutate(MMEJ_zscore_mean_bio = ave(MMEJ_zscore, integration, drug, concentration, replicate, FUN = function(x) mean(x)),
         MMEJ_zscore_mean = ave(MMEJ_zscore_mean_bio, integration, drug, concentration, FUN = function(x) mean(x)))

indel.data <- indel.data %>%
  mutate(freqCut_norm_mean_bio = ave(freqCut_norm, integration, drug, concentration, replicate, FUN = function(x) mean(x)),
         freqCut_norm_mean = ave(freqCut_norm_mean_bio, integration, drug, concentration, FUN = function(x) mean(x)))


indel.data <- indel.data %>%
  mutate(MMEJ_zscore_mean_bio_drug = ave(MMEJ_zscore_mean_bio, drug, concentration, replicate, FUN = function(x) mean(x)),
         freqCut_norm_mean_bio_drug = ave(freqCut_norm_mean_bio, drug, concentration, replicate, FUN = function(x) mean(x)))

Drug effects per target group

# Density plot logratio of the controls
ggplot(data = indel.data[indel.data$drug == c("DMSO","DNA-PKi","Mirin"),], 
       aes(x = MMEJ_zscore, color = target)) + 
  theme_bw() +
  geom_density() + 
  xlab("MMEJ z-score") +
  facet_wrap(~replicate) +
  scale_color_manual(values = colors_diverse)

# Beeswarm plot of MMEJ score of the controls at all IPRs
ggplot(data = indel.data[indel.data$drug == "DMSO",], 
       aes(y = MMEJscore, x = integration)) + theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1.0, vjust = 0.5)) + 
  geom_quasirandom(dodge.width = 0.75) +
  geom_boxplot(position = position_dodge(0.75), alpha = 0.7)+ xlab("integration") + 
  labs(title = "MMEJ score of DMSO-treated cells") +
  facet_wrap(~replicate)

# Beeswarm plot of MMEJ score of all drugs, highlighting controls
ggplot(data = indel.data %>%
         dplyr::select(MMEJ_zscore_mean_bio, sample, drug, concentration, integration, replicate) %>%
         unique(), 
       aes(y = MMEJ_zscore_mean_bio, x = 'x', color = sample)) + theme_bw() +
  geom_quasirandom(dodge.width = 0.75) +
  geom_boxplot(position = position_dodge(0.75), alpha = 0.4) +
  xlab("") + ylab("MMEJ score") +
  scale_color_manual(values = colors_diverse) +
  facet_grid(replicate~concentration)

# Integral of MMEJ-zscore density plot per target group
ggplot(data = indel.data, 
       aes(x = MMEJ_zscore)) +
  xlab("logratio") + ylab("acumulative value")+ stat_ecdf(aes(colour = target))+ theme_classic() +
  labs(title = "accumulative logratio of all targets") + facet_wrap(~replicate)

# Integral of freqCut density plot
ggplot(data = indel.data, 
       aes(x = freqCut)) +
  xlab("efficiency: normalized over plate") + stat_ecdf(aes(colour = target))+ theme_classic() +
  labs(title = "integral of efficiency density")

# Total amount of drugs per target group
ggplot(indel.data, aes(x = reorder(target,target, function(x)-length(x)))) +
  geom_bar()+theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 12))+ 
  ylab("datapoints") + xlab("target group")+ labs(title = "datapoints per target group")

indel.data2 <- indel.data %>% 
  mutate(MMEJ_zscore_mean = ave(MMEJ_zscore_mean_bio, replicate, concentration, drug, FUN = function (x) mean(x))) %>%
  dplyr::select(MMEJ_zscore_mean, concentration, target, drug, replicate) %>%
  unique()
indel.data2$zscore_change <- "<1"
indel.data2$zscore_change[abs(indel.data2$MMEJ_zscore_mean)>0.5] <- "0.5-1.5"
indel.data2$zscore_change[abs(indel.data2$MMEJ_zscore_mean)>1.5] <- ">1.5"
indel.data2 <- indel.data2 %>%
  mutate(size = ave(target, target, zscore_change, FUN = function(x) length(x)),
         total_size = ave(target, target, FUN = function(x) length(x)))


ggplot(indel.data2 %>%
         dplyr::select(target, size, zscore_change, concentration, total_size, replicate) %>%
         unique(), aes(x = reorder(target, as.numeric(total_size)), 
                   y = as.numeric(size), 
                       fill = zscore_change)) + 
      geom_bar(stat="identity") +
    labs(fill = "z-score")+
  scale_fill_manual(values = colors_diverse[seq(1, length(colors_diverse), 2)])+
  coord_flip()+ theme_bw() +
  ylab("Amount of drugs per Category") + theme(axis.title.y = element_blank())+
  theme(text = element_text(size = 14), axis.text.y = element_text(size = 12))+
  facet_grid(replicate~concentration)

Correlation plots

# Visualize logratio change and efficiency change in one plot to spot outliers
indel.data2 <- indel.data %>% 
  dplyr::select(drug, concentration, replicate, MMEJ_zscore_mean_bio_drug, freqCut_norm_mean_bio_drug) %>%
  filter(drug != "DNA-PKi")
indel.data2 <- unique(indel.data2)

ggplot(indel.data2, aes(x = MMEJ_zscore_mean_bio_drug, y = log2(freqCut_norm_mean_bio_drug))) +
  geom_point(color = ifelse(abs(indel.data2$MMEJ_zscore_mean_bio_drug) > 1, "black", "grey"))+
  xlab("MMEJ z-score") + ylab("Efficiency relative to DMSO (log2)") +
  #gghighlight(abs(mean.MMEJ_zscore.drug) > 5, label_key = drug) + 
  theme_bw() +
  theme(text = element_text(size = 14)) +
  facet_grid(replicate~concentration)

# Correlation plot mean viability vs. mean efficiency
indel.data$mean.viability.drug = 
  ave(indel.data$viability, indel.data$drug, indel.data$concentration, FUN = function(x) mean(x))

indel.data2 <- indel.data %>% 
  dplyr::select(drug, concentration, replicate, mean.viability.drug, freqCut_norm_mean_bio_drug) %>% unique()

ggplot(indel.data2, aes(x = mean.viability.drug, y = log2(freqCut_norm_mean_bio_drug))) +
  geom_point(color = ifelse(abs(log2(indel.data2$freqCut_norm_mean_bio_drug)) > 0.2 , "black", "grey")) +
  xlab("Viability") + ylab("Cutting Efficiency Change") +
  #gghighlight(abs(mean.efficiency_zscore.drug) > 0.1, label_key = drug) + 
  theme_bw() +
  theme(text = element_text(size = 14)) +
  facet_grid(replicate~concentration)

# Correlation plot mean viability vs. mean ratio
indel.data2 <- indel.data %>% 
  dplyr::select(drug, concentration, MMEJ_zscore_mean_bio_drug, mean.viability.drug, replicate) %>% 
  filter(drug != "DNA-PKi") %>% 
  unique()

ggplot(indel.data2, aes(x = MMEJ_zscore_mean_bio_drug, y = mean.viability.drug)) +
  geom_point(color = ifelse(abs(indel.data2$MMEJ_zscore_mean_bio_drug) > 1, "black", "grey")) +
  xlab("MMEJ z-score") + ylab("Viability") +
  #gghighlight(abs(mean.MMEJ_zscore.drug) > 1.5, label_key = drug) + 
  theme_bw() +
  theme(text = element_text(size = 14)) +
  facet_wrap(~concentration, nrow = 1, ncol = 3)

ggplot(indel.data2[indel.data2$drug != "DNA-PKi",], aes(x = MMEJ_zscore_mean_bio_drug, y = mean.viability.drug)) +
  geom_point()+
  xlab("MMEJ z-score") + ylab("Viability")+
  geom_smooth(method = lm, color = "grey") +
  theme(text = element_text(size = 14))+
  facet_wrap(~concentration, nrow = 1, ncol = 3)+
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'

# Do we need a more stringent viability cutoff? 0.7?




# Correlation plots of the replicates
indel.data2 <- indel.data[-grep("DNA-PK|DMSO", indel.data$drug),]
indel.data.rep1 <- indel.data2 %>%
  filter(tech == '1') %>%
  dplyr::select(barcode, replicate, drug, concentration, 'R1' = MMEJ_zscore)
indel.data.rep2 <- indel.data2 %>%
  filter(tech == '2') %>%
  dplyr::select(barcode, replicate, drug, concentration, 'R2' = MMEJ_zscore)
indel.data.rep3 <- indel.data2 %>%
  filter(tech == '3') %>%
  dplyr::select(barcode, replicate, drug, concentration, 'R3' = MMEJ_zscore)

indel.data.rep <- merge(indel.data.rep1, indel.data.rep2, all = T)
indel.data.rep <- merge(indel.data.rep, indel.data.rep3, all = T)

# Correlation matrix plot
correlation.plot.rep1 <- indel.data.rep %>% 
  filter(replicate == "E177") %>%
  dplyr::select(R1, R2, R3) %>% 
  na.omit()

correlation.plot.rep2 <- indel.data.rep %>% 
  filter(replicate == "E1504") %>%
  dplyr::select(R1, R2, R3) %>% 
  na.omit()

n <- sample(1:nrow(indel.data), 5000)
boundaries <- seq(from = 0.7, by = 0.05, length.out = 4)
plt_replicate1 <- ggpairs(correlation.plot.rep1,
               upper = list(continuous = corColor),
               lower = list(continuous = function(data, mapping, ...) {
                   ggally_points(data = data[n, ], mapping = mapping, alpha = 0.1, size = 0.5) +
                   geom_abline(slope = 1, lty = "dashed", col = "red") +
                   theme_bw()}),
               diag = list(continuous = function(data, mapping, ...) {
                   ggally_densityDiag(data = data, mapping = mapping, alpha = 0.3, fill = "red") +
                   theme_bw()})) +
  ggtitle("Correlation Between Technical Replicates - replicate 1") +
  theme(text = element_text(size = 20))+
  xlab("MMEJ z-score") +
  ylab("MMEJ z-score") 
  # theme_bw()

plt_replicate2 <- ggpairs(correlation.plot.rep2,
               upper = list(continuous = corColor),
               lower = list(continuous = function(data, mapping, ...) {
                   ggally_points(data = data[n, ], mapping = mapping, alpha = 0.1, size = 0.5) +
                   geom_abline(slope = 1, lty = "dashed", col = "red") +
                   theme_bw()}),
               diag = list(continuous = function(data, mapping, ...) {
                   ggally_densityDiag(data = data, mapping = mapping, alpha = 0.3, fill = "red") +
                   theme_bw()})) +
  ggtitle("Correlation Between Technical Replicates - replicate 2") +
  theme(text = element_text(size = 20))+
  xlab("MMEJ z-score") +
  ylab("MMEJ z-score") 
  # theme_bw()

print(plt_replicate1)

print(plt_replicate2)

# Correlation between biological replicates
indel.data2 <- indel.data[-grep("DNA-PK|DMSO", indel.data$drug),] %>%
  dplyr::select(MMEJ_zscore_mean_bio_drug, replicate, drug, concentration) %>%
  unique() %>%
  spread(key = replicate, value = MMEJ_zscore_mean_bio_drug)

sp <- ggscatter(indel.data2, x = "E177", y = "E1504",
                size = 1.5, add = "reg.line",
   add.params = list(color = "blue", fill = "lightgray"), title = "mean drug effects on MMEJ-zscore rep 1 vs. rep 2",
   conf.int = TRUE, ylab = "rep 2", xlab = "rep 1")

sp + stat_cor(method = "pearson", label.x = -2, label.y = 1) + 
  facet_wrap(~concentration) +theme_bw() 
## `geom_smooth()` using formula 'y ~ x'

# Correlation between biological replicates - only DMSO - all integrations
indel.data2 <- indel.data[grep("DMSO", indel.data$drug),] %>%
  dplyr::select(freqCut, replicate, drug, concentration, integration, tech, drug_plate, well) %>%
  unique() %>%
  spread(key = replicate, value = freqCut)

sp <- ggscatter(indel.data2, x = "E177", y = "E1504",
                size = 1.5, add = "reg.line",
   add.params = list(color = "blue", fill = "lightgray"), title = "mean drug effects on efficiency rep 1 vs. rep 2",
   conf.int = TRUE, ylab = "rep 2", xlab = "rep 1")

sp + stat_cor(method = "pearson", label.x = 0.8, label.y = 0.7) + theme_bw() 
## `geom_smooth()` using formula 'y ~ x'

Locally vs. globally acting drugs

# Plotting efficiency per drug per integration
ggplot(indel.data %>%
         filter(drug != "DNA-PKi", target != "Negative Control", target != "MRN") %>%
         mutate(freqCut_mean = ave(freqCut, integration, drug, concentration, FUN = function(x) mean(x))) %>% 
         dplyr::select(freqCut_mean, integration, drug, concentration) %>% unique(), aes(x = integration, y = freqCut_mean)) +
  geom_quasirandom() +
  theme_bw() +
  facet_wrap(~concentration) +
  coord_flip()

# Plotting MMEJscore per drug per integration
ggplot(indel.data %>%
         filter(drug != "DNA-PKi", target != "Negative Control", target != "MRN") %>%
         dplyr::select(MMEJ_zscore_mean, integration, drug) %>% unique(), aes(x = integration, y = MMEJ_zscore_mean)) +
  geom_quasirandom() +
  theme_bw() +
  coord_flip()

# Plotting the MMEJ z-score against the SD in the MMEJ z-score between the 18 IPRs
ggplot(indel.data %>%
         filter(drug != "DNA-PKi", target != "Negative Control", target != "MRN") %>%
         mutate(MMEJ_zscore_sd_bio_drug = ave(MMEJ_zscore_mean_bio_drug, drug, concentration, FUN = function(x) sd(x))) %>%
         dplyr::select(MMEJ_zscore_mean_bio_drug, MMEJ_zscore_sd_bio_drug, drug, concentration, replicate) %>%
         unique(),
       aes(x = MMEJ_zscore_mean_bio_drug, y = MMEJ_zscore_sd_bio_drug, 
           color = ifelse(abs(MMEJ_zscore_mean_bio_drug) >= 0.5 & MMEJ_zscore_sd_bio_drug > 0.15, "red", "black"))) +
  geom_point() +
  geom_hline(yintercept = 0.15, linetype = "dashed", alpha = 0.3) +
  geom_vline(xintercept = 0.5, linetype = "dashed", alpha = 0.3) +
  geom_vline(xintercept = -0.5, linetype = "dashed", alpha = 0.3) +
  theme_bw() +
  xlab("MMEJ z-score per drug") +
  ylab("MMEJ z-score sd of integrations") +
  facet_grid(replicate~concentration) +
  scale_color_identity()

ggplot(indel.data %>%
         filter(drug != "DNA-PKi", target != "Negative Control", target != "MRN") %>%
         mutate(MMEJ_zscore_sd_bio_drug = ave(MMEJ_zscore_mean_bio_drug, drug, concentration, FUN = function(x) sd(x))) %>%
         dplyr::select(MMEJ_zscore_mean_bio_drug, MMEJ_zscore_sd_bio_drug, drug, concentration, replicate, target) %>%
         unique(),
       aes(x = MMEJ_zscore_mean_bio_drug, y = MMEJ_zscore_sd_bio_drug, 
           color = ifelse(abs(MMEJ_zscore_mean_bio_drug) >= 0.5 & MMEJ_zscore_sd_bio_drug > 0.15, "red", "black"))) +
  geom_point() +
  geom_hline(yintercept = 0.15, linetype = "dashed", alpha = 0.3) +
  geom_vline(xintercept = 0.5, linetype = "dashed", alpha = 0.3) +
  geom_vline(xintercept = -0.5, linetype = "dashed", alpha = 0.3) +
    theme_bw() +
    xlab("efficiency per drug") +
    ylab("efficiency sd of integrations") +
    facet_grid(concentration~target) +
    scale_color_identity()

indel.data$local <- "No"
indel.data$MMEJ_zscore_sd_bio_drug <- ave(indel.data$MMEJ_zscore_mean_bio_drug, indel.data$drug, indel.data$concentration, FUN = function(x) sd(x))
indel.data$local[abs(indel.data$MMEJ_zscore_mean_bio_drug) >= 0.25 & indel.data$MMEJ_zscore_sd_bio_drug > 0.1] <- "Yes"




indel.data2 <- indel.data %>% 
  dplyr::select(drug, concentration, target, local, replicate) %>%
  unique()
indel.data2 <- indel.data2 %>%
  mutate(size = ave(target, target, concentration, local, FUN = function(x) length(x)),
         total_size = ave(target, target, concentration, FUN = function(x) length(x)))


ggplot(indel.data2 %>%
         dplyr::select(target, size, local, concentration, total_size, replicate) %>%
         unique(), aes(x = reorder(target, as.numeric(total_size)), 
                   y = as.numeric(size), 
                       fill = local)) + 
      geom_bar(stat="identity") +
    labs(fill = "locally acting drugs")+
  scale_fill_manual(values = colors_diverse[seq(1, length(colors_diverse), 2)])+
  coord_flip()+ theme_bw() +
  ylab("Amount of drugs per Category") + theme(axis.title.y = element_blank())+
  theme(text = element_text(size = 14), axis.text.y = element_text(size = 12))+
  facet_grid(replicate~concentration)

ggplot(indel.data %>%
         filter(drug != "DNA-PKi", target != "Negative Control", target != "MRN") %>%
         mutate(freqCut_norm_sd_bio_drug = ave(freqCut_norm_mean_bio_drug, drug, concentration, FUN = function(x) sd(x))) %>%
         dplyr::select(freqCut_norm_sd_bio_drug, freqCut_norm_mean_bio_drug, drug, concentration, replicate) %>%
         unique(),
       aes(x = freqCut_norm_mean_bio_drug, y = freqCut_norm_sd_bio_drug, 
           color = ifelse(freqCut_norm_sd_bio_drug > 0.02, "red", "black"))) +
  geom_point() +
  geom_hline(yintercept = 0.02, linetype = "dashed", alpha = 0.3) +
  theme_bw() +
  xlab("efficiency per drug") +
  ylab("efficiency sd of integrations") +
  facet_grid(replicate~concentration) +
  scale_color_identity()

ggplot(indel.data %>%
           filter(drug != "DNA-PKi", target != "Negative Control", target != "MRN") %>%
           mutate(freqCut_norm_sd_bio_drug = ave(freqCut_norm_mean_bio_drug, drug, concentration, FUN = function(x) sd(x))) %>%
           dplyr::select(freqCut_norm_sd_bio_drug, freqCut_norm_mean_bio_drug, drug, concentration, replicate, target) %>%
           unique(),
       aes(x = freqCut_norm_mean_bio_drug, y = freqCut_norm_sd_bio_drug, 
           color = ifelse(freqCut_norm_sd_bio_drug > 0.02, "red", "black"))) +
    geom_quasirandom() +
    geom_hline(yintercept = 0.02, linetype = "dashed", alpha = 0.3) +
    theme_bw() +
    xlab("efficiency per drug") +
    ylab("efficiency sd of integrations") +
    facet_grid(concentration~target) +
    scale_color_identity()

Spotting outliers

# # Spot individual drug outliers per concentration
# indel.data2 <- indel.data
# indel.data2$MMEJ_zscore <- ave(indel.data2$MMEJ_zscore, indel.data2$concentration, indel.data2$drug, FUN = function(x) mean(x))
# indel.data2 <- indel.data2 %>% dplyr::select(drug, concentration, MMEJ_zscore)
# indel.data2 <- indel.data2[!duplicated(indel.data2),]
# indel.data2$concentration <- factor(indel.data2$concentration, levels=c("100nm", "1um", "10um"))
# 
# ggplot(indel.data2 %>%
#          filter(drug != "DNA-PKi"), aes(x=concentration, y=MMEJ_zscore)) +
#   geom_quasirandom() + 
#   gghighlight(abs(MMEJ_zscore) > 6, label_key = drug) + theme_bw()
#   
# # The same but only the range between 1 & 2
# ggplot(indel.data2[abs(indel.data2$MMEJ_zscore) < 2,], aes(x=concentration, y=MMEJ_zscore)) +
#   geom_quasirandom() + 
#   gghighlight(abs(MMEJ_zscore) > 1.7, label_key = drug) + theme_bw()
# 
# # Spot outliers by plotting DMSO versus drugs
# indel.data2 <- indel.data
# indel.data2$mean.ratio.bc.drug.conc <- ave(indel.data2$efficiency, 
#                                            indel.data2$concentration, indel.data2$drug, 
#                                            indel.data2$barcode, FUN = function(x) mean(x))
# indel.data2 <- indel.data2 %>% dplyr::select(barcode, drug, concentration, mean.ratio.bc.drug.conc)
# indel.data2 <- indel.data2[!duplicated(indel.data2),]
# indel.data2.high <- indel.data2[grep("1um", indel.data2$concentration),]
# indel.data2.high <- indel.data2.high[,-3]
# 
# indel.data2.high <- dcast(indel.data2.high, barcode ~ drug, value.var = "mean.ratio.bc.drug.conc")
# 
# 
# gghighlight_point(indel.data2.high, aes(x = DMSO, y = `Vorinostat (SAHA, MK0683)`), `Vorinostat (SAHA, MK0683)` - DMSO > 0.1) +
#   geom_abline(intercept = 0, slope = 1) +
#   geom_abline(intercept = 0.1, slope = 1, linetype = "dashed") +
#   geom_abline(intercept = - 0.1, slope = 1, linetype = "dashed") +
#   geom_text(aes(x = 0.4, y = 0.32, label = "-10% efficiency", angle = 35)) +
#   geom_text(aes(x = 0.4, y = 0.52, label = "+10% efficiency", angle = 35)) +
#   scale_x_continuous(limits = c(0.25, 1)) +
#   scale_y_continuous(limits = c(0.25, 1)) +
#   labs(title = "Efficiency compared to DMSO for each integration") + theme_bw()
# 
# # Make a beeswarm plot with alternative colors for outliers based on chi square scores
# ggplot(data = indel.data) +
#   geom_quasirandom(mapping = 
#                      aes(x = reorder(integration, order), y = logratio.mean, 
#                          colour = ifelse(p.value > 0.05,
#                                          "non-outlier","outlier")), alpha = 0.5) + theme_bw() +
#   theme(axis.text.x = element_text(angle = 90, hjust = 1.0, vjust = 0)) + 
#   ylab("log2(+1/-7 ratio)") + xlab("integration") + 
#   labs(title = "Outlier plot", colour = "Outlier based on p value < 0.05") + 
#   scale_color_manual(values=c("#999999", "#E69F00"))
# 
# 
# 
# # Boxplot of logratio 
# indel.data$cluster <- "Active Promoter/Enhancer" 
# indel.data$cluster[indel.data$order == "3" | 
#                       indel.data$order == "4"] <- "H3K9me2/3 Domain"
# indel.data$cluster[indel.data$order == "5" | 
#                       indel.data$order == "6" |
#                       indel.data$order == "7" | 
#                       indel.data$order == "8"] <- "Active Gene Body"
# indel.data$cluster[indel.data$order == "9" | 
#                       indel.data$order == "10" |
#                       indel.data$order == "11" | 
#                       indel.data$order == "12"] <- "LAD"
# indel.data$cluster[indel.data$order == "13" | 
#                       indel.data$order == "14"] <- "No Chromatin Marks"
# indel.data$cluster[indel.data$order == "15" | 
#                       indel.data$order == "16" |
#                       indel.data$order == "17" | 
#                       indel.data$order == "18"] <- "H3K27me3 Domain"
# 
# ggplot(data = indel.data[indel.data$drug != "DMSO",],
#        aes(x = reorder(integration, order),
#            y = efficiency_zscore)) +
#   geom_quasirandom(color = ifelse(indel.data[indel.data$drug != "DMSO",]$efficiency_zscore > 2 | 
#                                     indel.data[indel.data$drug != "DMSO",]$efficiency_zscore < -2, 
#                                   "#343a40", "#6c757d")) +
#   ylab("MMEJ score")+
#   scale_fill_brewer(palette = "Pastel2")+
#   xlab("") +
#   theme_bw()+
#   theme(axis.text.x = element_text(angle = 90, hjust = 1.0, vjust = 0))+
#   theme(axis.title.x = element_blank()) +
#   coord_flip()
# 
# # Are the logratios of the different barcode clusters different?
# indel.data2$int <- "1"
# indel.data2$int[indel.data2$order >= 9] <- "2"
# indel.data2$logratio <- ave(indel.data2$logratio, 
#                             indel.data2$integration, 
#                             indel.data2$rep, 
#                             FUN = function(x) mean(x))
# indel.data2 <- indel.data2 %>% dplyr::select(rep, logratio, int, integration) %>% unique()
# indel.data2$mean <- ave(indel.data2$logratio,
#                         indel.data2$int, 
#                         indel.data2$rep,
#                         FUN = function(x) mean(x))
# indel.data2$sd <- ave(indel.data2$logratio, 
#                       indel.data2$int, 
#                       indel.data2$rep,
#                       FUN = function(x) sd(x))
# indel.data2$p.value <- t.test(indel.data2$mean[indel.data2$int == 1], indel.data2$mean[indel.data2$int == 2])$p.value
# indel.data2$mean.bio <- ave(indel.data2$mean,
#                         indel.data2$int, 
#                         FUN = function(x) mean(x))
# indel.data2$sd.bio <- ave(indel.data2$logratio, 
#                       indel.data2$int, 
#                       FUN = function(x) sd(x))
# 
# 
# # Boxplot of efficiencies normalized
# ggplot(data = indel.data[indel.data$drug == "DMSO",]) +
#   geom_boxplot(mapping = aes(x = reorder(integration,order), y = efficiency.platenorm)) + theme_bw() +
#   theme(axis.text.x = element_text(angle = 90, hjust = 1.0, vjust = 0)) + 
#   labs(title = "Efficiency distribution per integration after normalization") 
# 
# # Beeswarm plot of efficiencies
# ggplot(data = indel.data) +
#   geom_quasirandom(mapping = 
#                      aes(x = reorder(integration,order), y = efficiency.mean, 
#                          colour = ifelse(p.value > 0.05,
#                                          "non-outlier","outlier"))) + theme_bw() +
#   theme(axis.text.x = element_text(angle = 90, hjust = 1.0, vjust = 0)) + 
#   ylab("efficiency") + xlab("integration") + labs(title = "Efficiency plot, outliers (based on logratio) in yellow", colour = "Logratio outlier based on p value < 0.05") + scale_color_manual(values=c("#999999", "#E69F00"))
# 
# 
# # Make a loop of plots, plotting the ratio per drug target group together with DMSO
# for (i in unique(indel.data$target)) {
#   data <- indel.data[indel.data$target == i | indel.data$target == "Negative Control", ]
#   p <- ggplot(data = data) +
#     geom_quasirandom(mapping = aes(x = reorder(integration,order), y = MMEJ_zscore, colour = target), alpha = 0.8, dodge.width = -0.5) + theme_bw() +
#     theme(axis.text.x = element_text(angle = 90, hjust = 1.0, vjust = 0))+ 
#     ylab("log2(+1/-7 ratio)") + xlab("integration")+ labs(title= paste("logratio of",i, "compared to DMSO"))
#   print(p)
# }
# 
# 
# # Aurora Kinase only
# indel.data.med <- indel.data[indel.data$concentration == "1um",]
# aurora.dmso <- indel.data.med[indel.data.med$drug == "DMSO" | indel.data.med$target == "Aurora Kinase",]
# aurora.dmso$target <- factor(aurora.dmso$target, levels = c("Negative Control", "Aurora Kinase"))
# ggplot(data =  aurora.dmso) +
#     geom_quasirandom(aes(x = reorder(integration,order), y = MMEJ_zscore, colour = target, group = target), 
#                      alpha = 0.8, dodge.width = -0.5, width = 0.1) + 
#   scale_colour_brewer(palette = "Set2")+
#   coord_flip()+
#   theme(axis.title.y = element_blank())+
#     ylab("Change log2(+1/-7 ratio)")+
#   theme(text = element_text(size = 14))+
#   geom_hline(yintercept = 1, linetype = "dashed", alpha = 0.5)+
#   theme_classic2()
# 
# 
# # Decitabine only
# decitabine.dmso <- indel.data[indel.data$drug == "DMSO" | 
#                                 indel.data$drug == "Decitabine",]
# decitabine.dmso$drug <- factor(decitabine.dmso$drug, levels = c("DMSO", "Decitabine"))
# decitabine.dmso$concentration <- factor(decitabine.dmso$concentration, levels = c("100nm", "1um", "10um"))
# 
# ggplot(decitabine.dmso, aes(x = reorder(integration,order), 
#                             y = MMEJ_zscore, 
#                             colour = drug, group = drug,
#                             dodge.width = -0.5,
#                             width = 0.1)) +
#     geom_quasirandom() + 
#   scale_colour_brewer(palette = "Set2") +
#   coord_flip() +
#   theme(axis.title.y = element_blank()) +
#   ylab("MMEJ z-score") +
#   xlab("") +
#   theme(text = element_text(size = 14)) +
#   geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5) +
#   theme_bw() + facet_wrap(~concentration)
# 
# # HDACs (efficiency and logratio)
# hdac.dmso <- indel.data[indel.data$drug == "Vorinostat (SAHA, MK0683)"|
#                           indel.data$drug == "Givinostat (ITF2357)"|
#                           indel.data$drug == "PCI-24781 (Abexinostat)"|
#                           indel.data$drug == "AR-42"|
#                           indel.data$drug == "Scriptaid ",]
# hdac.dmso <- hdac.dmso[hdac.dmso$concentration == "1um",]
# 
# hdac.dmso$MMEJ_zscore <- ave(hdac.dmso$MMEJ_zscore, 
#                              hdac.dmso$barcode, hdac.dmso$drug,
#                              FUN = function(x) mean(x))
# hdac.dmso$efficiency_zscore <- ave(hdac.dmso$efficiency_zscore, 
#                                     hdac.dmso$barcode, hdac.dmso$drug,
#                                     FUN = function(x) mean(x))
# 
# 
# hdac.dmso <- hdac.dmso %>% dplyr::select(drug, MMEJ_zscore, 
#                                   efficiency_zscore, order, 
#                                   integration) %>% unique()
# hdac.dmso$mean <- ave(hdac.dmso$MMEJ_zscore, hdac.dmso$integration,
#                       FUN = function(x) mean(x))
# hdac.dmso$mean2 <- ave(hdac.dmso$efficiency_zscore, hdac.dmso$integration,
#                       FUN = function(x) mean(x))
# 
# ggplot() +
#   geom_bar(data = hdac.dmso %>%
#              dplyr::select(mean, integration, order) %>%
#              unique(), stat="identity", aes(x =  reorder(integration,order), y = mean, alpha = 0.2)) +
#   geom_point(data = hdac.dmso, aes(x = reorder(integration,order), y = MMEJ_zscore, color = drug)) + 
#   scale_colour_brewer(palette = "Set2")+
#   coord_flip()+
#   theme(axis.title.y = element_blank())+
#   ylab("MMEJ z-score")+
#   xlab("") +
#   theme(text = element_text(size = 14))+
#   theme_bw()
# 
# ggplot() +
#   geom_bar(data = hdac.dmso %>%
#              dplyr::select(mean2, integration, order) %>%
#              unique(), stat="identity", aes(x =  reorder(integration,order), y = mean2, alpha = 0.2)) +
#   geom_point(data = hdac.dmso, aes(x = reorder(integration,order), y = efficiency_zscore, color = drug)) + 
#   scale_colour_brewer(palette = "Set2")+
#   coord_flip()+
#   theme(axis.title.y = element_blank())+
#   ylab("efficiency z-score")+
#   xlab("") +
#   theme(text = element_text(size = 14))+
#   theme_bw()
# 
# # Statistical significance? Is integration 17 different?
# anova <- aov(MMEJ_zscore ~ integration, data = hdac.dmso)
# summary.aov(anova)
# tukey <- TukeyHSD(anova)
# 
# # Make plots for all HDACs at the three concentrations
# indel.data$concentration <- factor(indel.data$concentration, levels = c("100nm", "1um", "10um"))
# ggplot(indel.data %>%
#          filter(target == "HDAC" | drug == "DMSO"),
#        aes(x = reorder(integration, order), y = efficiency_zscore, color = target)) +
#   geom_quasirandom(dodge.width = 0.75) +
#   geom_boxplot(position = position_dodge(0.75), alpha = 0.4) +
#   theme_bw() +
#   xlab("") +
#   scale_color_manual(values = colors_diverse) +
#   facet_wrap(~concentration) +
#   coord_flip()
# 
# # only at the specific domains
# indel.data$cluster2 <- "open" 
# indel.data$cluster2[indel.data$order == "9" | 
#                       indel.data$order == "10" |
#                       indel.data$order == "11" | 
#                       indel.data$order == "12" |
#                      indel.data$order == "13" | 
#                       indel.data$order == "14" |
#                      indel.data$order == "15" | 
#                       indel.data$order == "16" |
#                       indel.data$order == "17" | 
#                       indel.data$order == "18"] <- "condensed"
# 
# # Include detailed HDAC annotation
# hdac_annotation <- read.csv2("/DATA/usr/m.trauernicht/projects/EpiScreen/files_scripts/HDAC_categories.csv") %>%
#   dplyr::select(drug, Categorie)
# indel.data.hdac <- indel.data %>%
#   filter(target == "HDAC" | drug == "DMSO")
# indel.data.hdac <- merge(indel.data.hdac, hdac_annotation, all = T)
# 
# ggplot(indel.data.hdac[!is.na(indel.data.hdac$concentration),] %>%
#          filter(Categorie %in% c("HDAC1", "HDAC3", "DMSO")),
#        aes(x = Categorie, y = efficiency_zscore, color = cluster2)) +
#   geom_quasirandom(dodge.width = 0.75) +
#   geom_boxplot(position = position_dodge(0.75), alpha = 0.4) +
#   theme_bw() +
#   theme(axis.text.x = element_text(angle = 90, hjust = 1.0, vjust = 0.5)) + 
#   xlab("") +
#   scale_color_manual(values = colors_diverse) +
#   facet_wrap(~concentration)
setwd("/DATA/usr/m.trauernicht/projects/EpiScreen/files_scripts/")
filename <- SetFileName("_indel.data", "mt")
save(indel.data, file = filename)

Session Info

paste("Run time: ",format(Sys.time()-StartTime))
## [1] "Run time:  1.00937 mins"
getwd()
## [1] "/DATA/usr/m.trauernicht/projects/EpiScreen"
date()
## [1] "Fri Jun  4 17:55:52 2021"
sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] Laurae_0.0.0.9001           tidyr_1.1.3                
##  [3] ggpubr_0.4.0                platetools_0.1.3           
##  [5] gghighlight_0.3.1           DESeq2_1.30.1              
##  [7] SummarizedExperiment_1.20.0 Biobase_2.50.0             
##  [9] MatrixGenerics_1.2.1        matrixStats_0.58.0         
## [11] GenomicRanges_1.42.0        GenomeInfoDb_1.26.7        
## [13] IRanges_2.24.1              S4Vectors_0.28.1           
## [15] BiocGenerics_0.36.1         GGally_2.1.1               
## [17] ggrepel_0.9.1               data.table_1.14.0          
## [19] ggbeeswarm_0.6.0            ggplot2_3.3.3              
## [21] outliers_0.14               dplyr_1.0.5                
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_2.0-0       ggsignif_0.6.1         ellipsis_0.3.2        
##  [4] rio_0.5.26             XVector_0.30.0         rstudioapi_0.13       
##  [7] farver_2.1.0           bit64_4.0.5            AnnotationDbi_1.52.0  
## [10] fansi_0.4.2            splines_4.0.5          cachem_1.0.4          
## [13] geneplotter_1.68.0     knitr_1.33             jsonlite_1.7.2        
## [16] broom_0.7.6            annotate_1.68.0        compiler_4.0.5        
## [19] httr_1.4.2             backports_1.2.1        assertthat_0.2.1      
## [22] Matrix_1.3-2           fastmap_1.1.0          cli_2.5.0             
## [25] htmltools_0.5.1.1      tools_4.0.5            gtable_0.3.0          
## [28] glue_1.4.2             GenomeInfoDbData_1.2.4 reshape2_1.4.4        
## [31] Rcpp_1.0.6             carData_3.0-4          cellranger_1.1.0      
## [34] jquerylib_0.1.4        vctrs_0.3.8            nlme_3.1-152          
## [37] xfun_0.22              stringr_1.4.0          openxlsx_4.2.3        
## [40] lifecycle_1.0.0        rstatix_0.7.0          XML_3.99-0.6          
## [43] zlibbioc_1.36.0        scales_1.1.1           hms_1.0.0             
## [46] RColorBrewer_1.1-2     yaml_2.2.1             curl_4.3              
## [49] memoise_2.0.0          sass_0.3.1             reshape_0.8.8         
## [52] stringi_1.5.3          RSQLite_2.2.7          highr_0.9             
## [55] genefilter_1.72.1      zip_2.1.1              BiocParallel_1.24.1   
## [58] rlang_0.4.10           pkgconfig_2.0.3        bitops_1.0-7          
## [61] evaluate_0.14          lattice_0.20-41        purrr_0.3.4           
## [64] labeling_0.4.2         bit_4.0.4              tidyselect_1.1.1      
## [67] plyr_1.8.6             magrittr_2.0.1         R6_2.5.0              
## [70] generics_0.1.0         DelayedArray_0.16.3    DBI_1.1.1             
## [73] pillar_1.6.0           haven_2.4.1            foreign_0.8-81        
## [76] withr_2.4.2            mgcv_1.8-34            survival_3.2-10       
## [79] abind_1.4-5            RCurl_1.98-1.3         tibble_3.1.1          
## [82] crayon_1.4.1           car_3.0-10             utf8_1.2.1            
## [85] rmarkdown_2.7          locfit_1.5-9.4         grid_4.0.5            
## [88] readxl_1.3.1           blob_1.2.1             forcats_0.5.1         
## [91] digest_0.6.27          xtable_1.8-4           munsell_0.5.0         
## [94] beeswarm_0.3.1         vipor_0.4.5            bslib_0.2.4